import os, sys, time
import requests
from lxml import etree

pwd = os.getcwd()
sys.path.append(pwd)
from proxy import ValidIp

html_dir = '/home/ubuntu/workspace/spider_data/qichezhijia/html2/'

# 1. 保存html
def save_html(number, content):
    with open(html_dir+str(number),'a',encoding='utf-8') as f:
        f.write(response.text)

base_url = 'https://car.autohome.com.cn/config/spec/{}.html'
# base_url = 'https://car.autohome.com.cn/config/series/{}.html'
proxies = ValidIp()
for num in range(0, 50000):
    url = base_url.format(num)
    try:
        response = requests.get(url, proxies=proxies)
    except:
        proxies = ValidIp()
        response = requests.get(url, proxies=proxies)
    html = etree.HTML(response.text)
    result = html.xpath('//div[@class="pzbox"]')
    # 保存有参数停售汽车id
    zaishou = open('youzaishou2.txt', 'a')
    # 保存没有参数停售汽车id
    tingshou = open('wuzaishou2.txt', 'a')
    if not result:
        # print(num)
        tingshou.write(str(num)+',')
        pass
    else:
        print(result[0], num)
        zaishou.write(str(num)+',')
        save_html(num, response.text)
        pass
    zaishou.close()
    tingshou.close()
    